#	File name: PRQ-Replication

#	Author: Ryan Black and Ryan Owens

# 	Note: This file replicates the empirical results presented in our article on the influence of former law clerks. It also contains a bit of code used to make the figure showing the parameter estimates that appears in the online supplement. See the Stata code file for other replication code for the online supplement.

##########################
#### BEGIN CODE USED #####
##########################

library(cem)
library(MASS)
library(readstata13)
library(car)
library(lmtest)

od <- read.dta13("~/Desktop/PRQ-Replication.dta", convert.underscore = F, convert.factor = F)
od$clerkCat <- as.factor(od$clerkCat)

# Identify matching variables
match.vars <- c("attyOAexp", "againstOAExp", "statusNet", "amiciSupport", "amiciAgainst", "attyPet", "attyMale", "ideoCompat", "strongOpp", "helpOSG", "oppFLC")

# Convenience: Make a smaller subset to do the CEM on
small.data <- subset(od, subset = is.na(clerkCat) == F, select = c(match.vars, "winvote", "clerkCat"))

# Identify which variables should not be included in the imbalance calculation
todrop <- c("winvote", "clerkCat")

# Perform the CEM
cem.res  <- cem(treatment = "clerkCat", data = small.data, drop = todrop)

# Estimate logit model on the CEM data
cem.logit <- glm(data = small.data, winvote ~ clerkCat + attyOAexp + againstOAExp + statusNet + amiciSupport + amiciAgainst + attyPet + attyMale + ideoCompat + strongOpp + helpOSG + oppFLC, family = binomial(link = "logit"), weights = cem.res$w)

# Plot of regression coefficients (for supplement)
vn <- c("Constant", "Non-Sitting Justice", "Sitting Justice", "Employing Justice", "Attorney Experience", "Opponent Experience", "Resource Advantage", "Amicus Support", "Amicus Opposition", "Petitioner Attorney", "Male Attorney", "Ideological Congruence", "Lower Dissent", "OSG Position", "Opponent Former Clerk")
v2p <- array(data = NA, dim = c(length(cem.logit$coefficients), 5))
v2p[,1] <- cem.logit$coefficients
for(C in 1:length(cem.logit$coefficients)){
	se <- sqrt(diag(summary(cem.logit)$cov.scaled))[C]
	v2p[C,5] <- se
	v2p[C,2] <- v2p[C,1]/se
	v2p[C,3] <- v2p[C,1]-1.96*se
	v2p[C,4] <- v2p[C,1]+1.96*se	
}
po <- nrow(v2p):1
pdf(file = "~/desktop/Logit_Res.pdf", height = 9, width = 7)
par(mar = c(5,11,1,1))
plot(x = quantile(v2p[, c(3:4)], c(0,1)), y = c(1, nrow(v2p)), type = "n", xlab = "Coefficient", ylab = "", axes = F)
axis(1)
axis(2, at = 1:nrow(v2p), labels = rev(vn), las = 2, font = 2)
axis(2, at = (1:nrow(v2p))-0.35, labels = paste(rev(format(round(v2p[,1], 3), nsmall = 3)), " (", rev(format(round(v2p[,2],2), nsmall = 2, trim = T)), ")", sep = ""), las = 2, tick = F)
for(C in 1:nrow(v2p)){
	lines(c(v2p[C,3], v2p[C,4]), c(po[C], po[C]), lwd = 1.25)
	points(v2p[C,1], po[C], pch = 16, cex = 1.75)
}
abline(v = 0, lty = 2)
dev.off()

# Calculate imbalance for the pre and post data using random cutpoint profiles. Be warned that the pre-imbalance L1 profile command takes a minute or two to run depending on how fast your computer is.
set.seed(01081982)
pre.imb <- L1.profile(group = small.data$clerkCat, data = small.data, drop = todrop, plot = F, M = 500)
post.imb <- L1.profile(group = small.data$clerkCat, data = small.data, drop = todrop, plot = F, useCP = pre.imb$CP, weights = cem.res$w)
pre.imb$medianL1
post.imb$medianL1
(pre.imb$medianL1-post.imb$medianL1)/pre.imb$medianL1

# Draw some betas from multivariate normal for simulation prep
set.seed(01081982)
betas <- mvrnorm(n = 500, mu = t(coef(cem.logit)), Sigma = summary.glm(cem.logit)$cov.unscaled)
ref <- array(data = NA, dim = c(4, dim(betas)[1]))

# Simulations with observed values. Take the matched data and utilize the same CEM weights that we used for the logistic regression, rounding the weights up to the next biggest integer value. The observed values simulations will take each observation in the data and loop across the four values of the justice-attorney relationship variable. For each of those counterfactuals, loop across 500 random draws from the distribution of coefficients taken above. We're doing 3274 rows x 4 counterfactuals x 500 draws ≈ 6.5 million calculations, so this will also take a few seconds to run.
wc <- ceiling(cem.res$w)
ovr.data <- coredata(small.data)[rep(seq(nrow(small.data)), wc),]
ovr <- array(data = NA, dim = c(nrow(ovr.data), 4, dim(betas)[1]))

# For each observation, iteratively replace the clerk-justice relationship dummy variable and calculate the 500 predicted values from the random draw of coefficients. Store these values in an array (to be used below to calculate differences across counterfactuals).
for(I in 1:nrow(ovr.data)){
	X <- c(1, rep(0, length(cem.logit$coefficients)-1))
	X[5:length(cem.logit$coefficients)] <- unlist(ovr.data[I, c(1:(dim(ovr.data)[2]-length(todrop)))])
	for(D in 1:dim(betas)[1]){ovr[I,1,D] <- plogis(X %*% betas[D,])}
	for(C in 2:4){
		X <- c(1, rep(0, length(cem.logit$coefficients)-1))
		X[5:length(cem.logit$coefficients)] <- unlist(ovr.data[I, c(1:(dim(ovr.data)[2]-length(todrop)))])
		X[C] <- 1
		for(D in 1:dim(betas)[1]){ovr[I,C,D] <- plogis(X %*% betas[D,])}
	}
}

# Calculate summaries of these observed value simulations for plotting in Figure 1
res <- array(data = NA, dim = c(4, 3))
for(I in 1:4){
	xbar <- mean(ovr[,I,])
	rse <- sd(ovr[,I,])/sqrt(dim(betas)[1])
	res[I,2] <- xbar
	res[I,1] <- xbar-1.96*rse
	res[I,3] <- xbar+1.96*rse
}

# Make Figure 1
pdf(file = "~/desktop/ClerkWinProb.pdf", height = 6, width = 8)
par(mar = c(5,5,4,1))
Z <- 0.25
ypv <- quantile(res, c(0,1))
ypv[2] <- ypv[2]+0.01
plot(x = c(1-Z,4+Z), y = ypv, type = "n", xlab = "Attorney-Justice Background", ylab = "Probability Justice Votes for Attorney", main = "Probability Justice Votes for Attorney", axes = F)
abline(h = seq(0,1,.05), lty = 2, lwd = 0.75, col = "gray")
for(I in 1:4){lines(c(I,I), c(res[I,1], res[I,3]), lwd = 0.5, col = "black")}
points(1:4, res[,2], pch = 19, cex = 1.69)
axis(1, at = 1:4, labels = c("Never Clerked\n", "Clerked\nNon-Sitting Justice", "Clerked\nSitting Justice", "Clerked\nEmploying Justice"), tick = F)
axis(2, las = 2)
dev.off()

# Look at difference between clerked, sitting justice vs. never clerked. 
sit.v.never <- ovr[,3,]-ovr[,1,]
svn.ovd <- c()
for(D in 1:dim(betas)[1]){svn.ovd[D] <- mean(sit.v.never[,D])}
2*(1-(sum(sign(svn.ovd) == 1)/length(svn.ovd)))

# Look at difference between clerked, sitting justice vs. clerked, non-sitting
sit.v.nonsit <- ovr[,3,]-ovr[,2,]
svns.ovd <- c()
for(D in 1:dim(betas)[1]){svns.ovd[D] <- mean(sit.v.nonsit[,D])}
2*(1-(sum(sign(svns.ovd) == 1)/length(svns.ovd)))



########################
#### END CODE USED #####
########################
